#!/usr/bin/env python
# coding: utf-8

# In[ ]:


#pip install openai
import pandas as pd
import os
import openai
import time
import re


# In[ ]:


path='/Users/meysam/Documents/UZH/ChatGPT/Task1/'


df=pd.read_csv(path+'training_data_reduced.csv')
df=df.dropna(subset='relevant')
df['relevant']=df['relevant'].astype(int)
df.to_csv(path+'training_data_reduced_readytouse.csv')
my_text=df['text'].tolist()
df['relevant'].value_counts()


# In[ ]:


def run_chatgpt_default(my_text,k):
    openai.api_key="YOUR KEY"

    label=[]
    text_list=[]
    content="In this job, you will be shown a sample of Tweets collected from the social media platform Twitter. Your task will be to determine if the Tweets have to do with “content” moderation” or not. \n “Content moderation” refers to the practice of screening and monitoring content posted by users on social media sites to determine if the content should be published or not, based on specific rules and guidelines. \n Every time someone posts something on a platform like Facebook or Twitter, that piece of content goes through a review process (‘content moderation’) to ensure that it is not illegal, hateful or inappropriate and that it complies with the rules of the site. When that is not the case, that piece of content can be removed, flagged, labelled as or ‘disputed’. \n Deciding what should be allowed on social media is not always easy. For example, many sites ban child pornography and terrorist content as it is illegal. However, things are less clear when it comes to content about the safety of vaccines or politics, for example. Even when people agree that some content should be blocked, they do not always agree about the best way to do so, about how effective it is and who should do it (the government or private companies, human moderators or artificial intelligence). \n For each tweet in the sample: Carefully read the text of the Tweet, paying close attention to details. Classify the Tweet as either irrelevant (0) or relevant (1). \n Tweets should be coded as relevant when they directly relate to content moderation. This includes Tweets that discuss social media platforms’ content moderation rules and practices, and Tweets that discuss governments’ regulation of online content moderation. This also includes Tweets that discuss mild forms of content moderation, like flagging Tweets and Tweets when they indirectly relate to content moderation.\n Tweets should be coded as irrelevant if they do not refer to content moderation or if they are themselves examples of moderated content. This would include, for example, a Tweet by Donald Trump that Twitter has labelled as ‘disputed’, a Tweet claiming that something is false, or a Tweet containing sensitive content. just label 'relevant' or 'irrelevant' without any more explanation "
    

    for text in my_text:
        i=my_text.index(text)
        print(i)

        completion = openai.ChatCompletion.create(
          model="gpt-3.5-turbo",

        messages=[
          {
            "role": "system",
            "content": content
          },
          {
            "role": "user",
            "content": " here's the tweet I picked, please label it as 'Relevant' or 'Irrelevant':"+ text        
                            
          }
        ]
        )
        label.append((completion.choices[0].message.content))
        text_list.append(text)
        print(completion.choices[0].message.content)


    df_label=pd.DataFrame()
    df_label['text']=text_list
    df_label['label']=label

    df_label.to_csv(path+'label_task1_round_{0}.csv'.format(k))
    return df_label


# In[ ]:


def run_chatgpt_temperature(my_text,k):
    openai.api_key="YOUR KEY"

    label=[]
    text_list=[]

    content="In this job, you will be shown a sample of Tweets collected from the social media platform Twitter. Your task will be to determine if the Tweets have to do with “content” moderation” or not. \n “Content moderation” refers to the practice of screening and monitoring content posted by users on social media sites to determine if the content should be published or not, based on specific rules and guidelines. \n Every time someone posts something on a platform like Facebook or Twitter, that piece of content goes through a review process (‘content moderation’) to ensure that it is not illegal, hateful or inappropriate and that it complies with the rules of the site. When that is not the case, that piece of content can be removed, flagged, labelled as or ‘disputed’. \n Deciding what should be allowed on social media is not always easy. For example, many sites ban child pornography and terrorist content as it is illegal. However, things are less clear when it comes to content about the safety of vaccines or politics, for example. Even when people agree that some content should be blocked, they do not always agree about the best way to do so, about how effective it is and who should do it (the government or private companies, human moderators or artificial intelligence). \n For each tweet in the sample: Carefully read the text of the Tweet, paying close attention to details. Classify the Tweet as either irrelevant (0) or relevant (1). \n Tweets should be coded as relevant when they directly relate to content moderation. This includes Tweets that discuss social media platforms’ content moderation rules and practices, and Tweets that discuss governments’ regulation of online content moderation. This also includes Tweets that discuss mild forms of content moderation, like flagging Tweets and Tweets when they indirectly relate to content moderation.\n Tweets should be coded as irrelevant if they do not refer to content moderation or if they are themselves examples of moderated content. This would include, for example, a Tweet by Donald Trump that Twitter has labelled as ‘disputed’, a Tweet claiming that something is false, or a Tweet containing sensitive content. just label 'relevant' or 'irrelevant' without any more explanation "
    

    for text in my_text:
        i=my_text.index(text)
        print(i)

        completion = openai.ChatCompletion.create(
          model="gpt-3.5-turbo",
          temperature=0.2,

        messages=[
          {
            "role": "system",
            "content": content
          },
          {
            "role": "user",
            "content": " here's the tweet I picked, please label it as 'Relevant' or 'Irrelevant':"+ text        
                            
          }
        ]
        )
        label.append((completion.choices[0].message.content))
        text_list.append(text)
        print(completion.choices[0].message.content)


    df_label=pd.DataFrame()
    df_label['text']=text_list
    df_label['label']=label

    df_label.to_csv(path+'label_task1_round_{0}.csv'.format(k))
    
    return df_label


# In[ ]:


def find_label(text):
    label=-1
    if ('relevant.' in text) or ('Relevant.' in text) or ('relevant' in text) or ('Relevant' in text):
        label=True
        
    if ('Irrelevant.' in text) or ('Irrelevant' in text) or ('irrelevant' in text) or ('irrelevant.' in text):
        label=False
    
    return label


def find_intersections(x,y):
    if (x==False) and (y==False):
        inter=1
    elif (x==True) and (y==True):
        inter=1
    else:
        inter=0
    return inter


# In[ ]:


def evaluate(df_label):

    df=pd.read_csv(path+'training_data_reduced_readytouse.csv')
    print(df.shape)

    df_final1=pd.concat([df,df_label],1)
    
    

    df_final1=df_final1.dropna(subset='label')
    df_final1['label']=df_final1['label'].apply(lambda x:find_label(x))
    df_final1['intersection']=df_final1.apply(lambda x: find_intersections(x['label'],x['relevant']),axis=1)

    print('****************************\n ChatGPT labelling value count:')
    print('label: \n',df_final1['label'].value_counts())


    print('****************************\n intersection with RA labelling:')
    print(df_final1['intersection'].value_counts(normalize=True))


    return df_final1



def compare(df1,df2,l1,l2):
    df_compare=pd.concat([df1,df2],1)
    
    df_compare['label_intersection']=df_compare.apply(lambda x: find_intersections(x[l1],x[l2]),axis=1)

    return (df_compare['label_intersection'].value_counts(normalize=True))


# In[ ]:


print('\n Task 1: Round 1 with default temperature:')
df_label1=run_chatgpt_default(my_text,1)
df_final1=evaluate(df_label1)
df_final1=df_final1.rename(columns={'label':'label1'})

print('\n Task 1: Round 2 with default temperature:')
df_label2=run_chatgpt_default(my_text,2)
df_final2=evaluate(df_label2)
df_final2=df_final2.rename(columns={'label':'label2'})


compare_default=compare(df_final1,df_final2,'label1','label2')
print('****************************\n intersection of two rounds of labelling with default temperature:')
print(compare_default)


print('\n Task 1: Round 3 with temperature=0.2:')
df_label3=run_chatgpt_temperature(my_text,3)
df_final3=evaluate(df_label3)
df_final3=df_final3.rename(columns={'label':'label3'})

print('\n Task 1: Round 4 with temperature=0.2:')
df_label4=run_chatgpt_temperature(my_text,4)
df_final4=evaluate(df_label4)
df_final4=df_final4.rename(columns={'label':'label4'})


compare_temperature=compare(df_final3,df_final4,'label3','label4')
print('****************************\n intersection of two rounds of labelling with temperature=0.2:')
print(compare_temperature)


# In[ ]:




